From fd8719a93192f01e88303d3b5f42619aa04fed90 Mon Sep 17 00:00:00 2001 From: "kaf24@firebug.cl.cam.ac.uk" Date: Tue, 28 Mar 2006 18:43:30 +0100 Subject: [PATCH] Clean up and fix VCPU hotplug and SMP save/restore. 1. No longer hold xenbus_lock while taking down VCPUs in SMP suspend path. This allows block device hotplug to continue working and so we will not deadlock on paging in userspace hotplug code. 2. Track xenbus and local-admin permitted cpumasks for VCPUs to bring online. So, if a local admin takes a CPU down, that won't surprisingly get overridden next time the kernel interrogates xenstore. Signed-off-by: Keir Fraser --- .../drivers/xen/core/reboot.c | 107 +++------------ .../drivers/xen/core/smpboot.c | 123 +++++++++++++++++- .../drivers/xen/pciback/xenbus.c | 2 +- .../drivers/xen/xenbus/xenbus_xs.c | 29 ++++- linux-2.6-xen-sparse/include/xen/gnttab.h | 3 + linux-2.6-xen-sparse/include/xen/xenbus.h | 9 ++ 6 files changed, 176 insertions(+), 97 deletions(-) diff --git a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c index e63e40465d..f0f1d803a5 100644 --- a/linux-2.6-xen-sparse/drivers/xen/core/reboot.c +++ b/linux-2.6-xen-sparse/drivers/xen/core/reboot.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #if defined(__i386__) || defined(__x86_64__) @@ -76,31 +77,24 @@ static int shutting_down = SHUTDOWN_INVALID; static void __shutdown_handler(void *unused); static DECLARE_WORK(shutdown_work, __shutdown_handler, NULL); -#ifndef CONFIG_HOTPLUG_CPU -#define cpu_down(x) (-EOPNOTSUPP) -#define cpu_up(x) (-EOPNOTSUPP) +#ifdef CONFIG_SMP +int smp_suspend(void); +void smp_resume(void); +#else +#define smp_suspend() (0) +#define smp_resume() ((void)0) #endif - static int __do_suspend(void *ignore) { - int i, j, k, fpp; + int i, j, k, fpp, err; extern unsigned long max_pfn; extern unsigned long *pfn_to_mfn_frame_list_list; extern unsigned long *pfn_to_mfn_frame_list[]; - extern int gnttab_suspend(void); - extern int gnttab_resume(void); extern void time_resume(void); -#ifdef CONFIG_SMP - cpumask_t prev_online_cpus; - int vcpu_prepare(int vcpu); -#endif - - int err = 0; - BUG_ON(smp_processor_id() != 0); BUG_ON(in_interrupt()); @@ -110,40 +104,12 @@ static int __do_suspend(void *ignore) return -EOPNOTSUPP; } -#if defined(CONFIG_SMP) && !defined(CONFIG_HOTPLUG_CPU) - if (num_online_cpus() > 1) { - printk(KERN_WARNING "Can't suspend SMP guests " - "without CONFIG_HOTPLUG_CPU\n"); - return -EOPNOTSUPP; - } -#endif + err = smp_suspend(); + if (err) + return err; xenbus_suspend(); - lock_cpu_hotplug(); -#ifdef CONFIG_SMP - /* - * Take all other CPUs offline. We hold the hotplug mutex to - * avoid other processes bringing up CPUs under our feet. - */ - cpus_clear(prev_online_cpus); - while (num_online_cpus() > 1) { - for_each_online_cpu(i) { - if (i == 0) - continue; - unlock_cpu_hotplug(); - err = cpu_down(i); - lock_cpu_hotplug(); - if (err != 0) { - printk(KERN_CRIT "Failed to take all CPUs " - "down: %d.\n", err); - goto out_reenable_cpus; - } - cpu_set(i, prev_online_cpus); - } - } -#endif - preempt_disable(); #ifdef __i386__ @@ -153,7 +119,6 @@ static int __do_suspend(void *ignore) __cli(); preempt_enable(); - unlock_cpu_hotplug(); gnttab_suspend(); @@ -203,30 +168,9 @@ static int __do_suspend(void *ignore) xencons_resume(); -#ifdef CONFIG_SMP - for_each_cpu(i) - vcpu_prepare(i); - -#endif - - /* - * Only resume xenbus /after/ we've prepared our VCPUs; otherwise - * the VCPU hotplug callback can race with our vcpu_prepare - */ xenbus_resume(); -#ifdef CONFIG_SMP - out_reenable_cpus: - for_each_cpu_mask(i, prev_online_cpus) { - j = cpu_up(i); - if ((j != 0) && !cpu_online(i)) { - printk(KERN_CRIT "Failed to bring cpu " - "%d back up (%d).\n", - i, j); - err = j; - } - } -#endif + smp_resume(); return err; } @@ -334,7 +278,6 @@ static void shutdown_handler(struct xenbus_watch *watch, kfree(str); } -#ifdef CONFIG_MAGIC_SYSRQ static void sysrq_handler(struct xenbus_watch *watch, const char **vec, unsigned int len) { @@ -360,45 +303,35 @@ static void sysrq_handler(struct xenbus_watch *watch, const char **vec, if (err == -EAGAIN) goto again; - if (sysrq_key != '\0') { +#ifdef CONFIG_MAGIC_SYSRQ + if (sysrq_key != '\0') handle_sysrq(sysrq_key, NULL, NULL); - } -} #endif +} static struct xenbus_watch shutdown_watch = { .node = "control/shutdown", .callback = shutdown_handler }; -#ifdef CONFIG_MAGIC_SYSRQ static struct xenbus_watch sysrq_watch = { .node ="control/sysrq", .callback = sysrq_handler }; -#endif static int setup_shutdown_watcher(struct notifier_block *notifier, unsigned long event, void *data) { - int err1 = 0; -#ifdef CONFIG_MAGIC_SYSRQ - int err2 = 0; -#endif - - err1 = register_xenbus_watch(&shutdown_watch); -#ifdef CONFIG_MAGIC_SYSRQ - err2 = register_xenbus_watch(&sysrq_watch); -#endif + int err; - if (err1) + err = register_xenbus_watch(&shutdown_watch); + if (err) printk(KERN_ERR "Failed to set shutdown watcher\n"); -#ifdef CONFIG_MAGIC_SYSRQ - if (err2) + err = register_xenbus_watch(&sysrq_watch); + if (err) printk(KERN_ERR "Failed to set sysrq watcher\n"); -#endif return NOTIFY_DONE; } diff --git a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c index fb0b822b5c..1f588c59ee 100644 --- a/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c +++ b/linux-2.6-xen-sparse/drivers/xen/core/smpboot.c @@ -79,6 +79,15 @@ EXPORT_SYMBOL(x86_cpu_to_apicid); unsigned int maxcpus = NR_CPUS; #endif +/* + * Set of CPUs that remote admin software will allow us to bring online. + * Notified to us via xenbus. + */ +static cpumask_t xenbus_allowed_cpumask; + +/* Set of CPUs that local admin will allow us to bring online. */ +static cpumask_t local_allowed_cpumask = CPU_MASK_ALL; + void __init prefill_possible_map(void) { int i, rc; @@ -146,7 +155,7 @@ static void cpu_bringup(void) cpu_idle(); } -void vcpu_prepare(int vcpu) +static void vcpu_prepare(int vcpu) { vcpu_guest_context_t ctxt; struct task_struct *idle = idle_task(vcpu); @@ -278,6 +287,8 @@ void __init smp_prepare_cpus(unsigned int max_cpus) vcpu_prepare(cpu); } + xenbus_allowed_cpumask = cpu_present_map; + /* Currently, Xen gives no dynamic NUMA/HT info. */ for (cpu = 1; cpu < NR_CPUS; cpu++) { cpu_sibling_map[cpu] = cpumask_of_cpu(cpu); @@ -301,6 +312,15 @@ void __devinit smp_prepare_boot_cpu(void) cpu_online_map = cpumask_of_cpu(0); } +static int local_cpu_hotplug_request(void) +{ + /* + * We assume a CPU hotplug request comes from local admin if it is made + * via a userspace process (i.e., one with a real mm_struct). + */ + return (current->mm != NULL); +} + #ifdef CONFIG_HOTPLUG_CPU /* @@ -331,8 +351,10 @@ static void vcpu_hotplug(unsigned int cpu) } if (strcmp(state, "online") == 0) { + cpu_set(cpu, xenbus_allowed_cpumask); (void)cpu_up(cpu); } else if (strcmp(state, "offline") == 0) { + cpu_clear(cpu, xenbus_allowed_cpumask); (void)cpu_down(cpu); } else { printk(KERN_ERR "XENBUS: unknown state(%s) on CPU%d\n", @@ -353,6 +375,22 @@ static void handle_vcpu_hotplug_event( } } +static int smpboot_cpu_notify(struct notifier_block *notifier, + unsigned long action, void *hcpu) +{ + int cpu = (long)hcpu; + + /* + * We do this in a callback notifier rather than __cpu_disable() + * because local_cpu_hotplug_request() does not work in the latter + * as it's always executed from within a stopmachine kthread. + */ + if ((action == CPU_DOWN_PREPARE) && local_cpu_hotplug_request()) + cpu_clear(cpu, local_allowed_cpumask); + + return NOTIFY_OK; +} + static int setup_cpu_watcher(struct notifier_block *notifier, unsigned long event, void *data) { @@ -360,7 +398,8 @@ static int setup_cpu_watcher(struct notifier_block *notifier, static struct xenbus_watch cpu_watch = { .node = "cpu", - .callback = handle_vcpu_hotplug_event }; + .callback = handle_vcpu_hotplug_event, + .flags = XBWF_new_thread }; (void)register_xenbus_watch(&cpu_watch); if (!(xen_start_info->flags & SIF_INITDOMAIN)) { @@ -375,14 +414,62 @@ static int setup_cpu_watcher(struct notifier_block *notifier, static int __init setup_vcpu_hotplug_event(void) { + static struct notifier_block hotplug_cpu = { + .notifier_call = smpboot_cpu_notify }; static struct notifier_block xsn_cpu = { .notifier_call = setup_cpu_watcher }; + + register_cpu_notifier(&hotplug_cpu); register_xenstore_notifier(&xsn_cpu); + return 0; } arch_initcall(setup_vcpu_hotplug_event); +int smp_suspend(void) +{ + int i, err; + + lock_cpu_hotplug(); + + /* + * Take all other CPUs offline. We hold the hotplug mutex to + * avoid other processes bringing up CPUs under our feet. + */ + while (num_online_cpus() > 1) { + unlock_cpu_hotplug(); + for_each_online_cpu(i) { + if (i == 0) + continue; + err = cpu_down(i); + if (err) { + printk(KERN_CRIT "Failed to take all CPUs " + "down: %d.\n", err); + for_each_cpu(i) + vcpu_hotplug(i); + return err; + } + } + lock_cpu_hotplug(); + } + + return 0; +} + +void smp_resume(void) +{ + int i; + + for_each_cpu(i) + vcpu_prepare(i); + + unlock_cpu_hotplug(); + + for_each_cpu(i) + vcpu_hotplug(i); +} + int __cpu_disable(void) { cpumask_t map = cpu_online_map; @@ -415,6 +502,20 @@ void __cpu_die(unsigned int cpu) #else /* !CONFIG_HOTPLUG_CPU */ +int smp_suspend(void) +{ + if (num_online_cpus() > 1) { + printk(KERN_WARNING "Can't suspend SMP guests " + "without CONFIG_HOTPLUG_CPU\n"); + return -EOPNOTSUPP; + } + return 0; +} + +void smp_resume(void) +{ +} + int __cpu_disable(void) { return -ENOSYS; @@ -429,6 +530,20 @@ void __cpu_die(unsigned int cpu) int __devinit __cpu_up(unsigned int cpu) { + int rc; + + if (local_cpu_hotplug_request()) { + cpu_set(cpu, local_allowed_cpumask); + if (!cpu_isset(cpu, xenbus_allowed_cpumask)) { + printk("%s: attempt to bring up CPU %u disallowed by " + "remote admin.\n", __FUNCTION__, cpu); + return -EBUSY; + } + } else if (!cpu_isset(cpu, local_allowed_cpumask) || + !cpu_isset(cpu, xenbus_allowed_cpumask)) { + return -EBUSY; + } + #ifdef CONFIG_SMP_ALTERNATIVES if (num_online_cpus() == 1) prepare_for_smp(); @@ -436,7 +551,9 @@ int __devinit __cpu_up(unsigned int cpu) xen_smp_intr_init(cpu); cpu_set(cpu, cpu_online_map); - if (HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL) != 0) + + rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); + if (rc != 0) BUG(); return 0; diff --git a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c index f338de1f37..c64d250067 100644 --- a/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c +++ b/linux-2.6-xen-sparse/drivers/xen/pciback/xenbus.c @@ -16,7 +16,7 @@ static struct pciback_device *alloc_pdev(struct xenbus_device *xdev) { struct pciback_device *pdev; - pdev = kmalloc(sizeof(struct pciback_device), GFP_KERNEL); + pdev = kzalloc(sizeof(struct pciback_device), GFP_KERNEL); if (pdev == NULL) goto out; dev_dbg(&xdev->dev, "allocated pdev @ 0x%p\n", pdev); diff --git a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c index 7e343eff21..2cb2e97a87 100644 --- a/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c +++ b/linux-2.6-xen-sparse/drivers/xen/xenbus/xenbus_xs.c @@ -685,6 +685,24 @@ void xs_resume(void) up_write(&xs_state.suspend_mutex); } +static int xenwatch_handle_callback(void *data) +{ + struct xs_stored_msg *msg = data; + + msg->u.watch.handle->callback(msg->u.watch.handle, + (const char **)msg->u.watch.vec, + msg->u.watch.vec_size); + + kfree(msg->u.watch.vec); + kfree(msg); + + /* Kill this kthread if we were spawned just for this callback. */ + if (current->pid != xenwatch_pid) + do_exit(0); + + return 0; +} + static int xenwatch_thread(void *unused) { struct list_head *ent; @@ -707,12 +725,11 @@ static int xenwatch_thread(void *unused) if (ent != &watch_events) { msg = list_entry(ent, struct xs_stored_msg, list); - msg->u.watch.handle->callback( - msg->u.watch.handle, - (const char **)msg->u.watch.vec, - msg->u.watch.vec_size); - kfree(msg->u.watch.vec); - kfree(msg); + if (msg->u.watch.handle->flags & XBWF_new_thread) + kthread_run(xenwatch_handle_callback, + msg, "xenwatch_cb"); + else + xenwatch_handle_callback(msg); } mutex_unlock(&xenwatch_mutex); diff --git a/linux-2.6-xen-sparse/include/xen/gnttab.h b/linux-2.6-xen-sparse/include/xen/gnttab.h index 584a5db355..e9fd1b63ae 100644 --- a/linux-2.6-xen-sparse/include/xen/gnttab.h +++ b/linux-2.6-xen-sparse/include/xen/gnttab.h @@ -110,6 +110,9 @@ void gnttab_grant_foreign_transfer_ref(grant_ref_t, domid_t domid, #define gnttab_map_vaddr(map) ((void *)(map.host_virt_addr)) #endif +int gnttab_suspend(void); +int gnttab_resume(void); + #endif /* __ASM_GNTTAB_H__ */ /* diff --git a/linux-2.6-xen-sparse/include/xen/xenbus.h b/linux-2.6-xen-sparse/include/xen/xenbus.h index 122480584f..6d69963870 100644 --- a/linux-2.6-xen-sparse/include/xen/xenbus.h +++ b/linux-2.6-xen-sparse/include/xen/xenbus.h @@ -55,8 +55,17 @@ struct xenbus_watch /* Callback (executed in a process context with no locks held). */ void (*callback)(struct xenbus_watch *, const char **vec, unsigned int len); + + /* See XBWF_ definitions below. */ + unsigned long flags; }; +/* + * Execute callback in its own kthread. Useful if the callback is long + * running or heavily serialised, to avoid taking out the main xenwatch thread + * for a long period of time (or even unwittingly causing a deadlock). + */ +#define XBWF_new_thread 1 /* A xenbus device. */ struct xenbus_device { -- 2.30.2